/*
This do file takes one simulated dataset (Scenario 1 Sample Size 500, medium survival, large heterogeneity)
and shows the analysis approach adopted in the TSD.

This do file fits standard parametric models
*/
cd "${DRIVE}/GitSoftware/TSD_simulation/Files_for_Appendix\Example"

// stexpect3 calculates marginal expected survival and hazard rates
adopath ++ "../../stexpect3"

use sim1, clear

stset t,f(d==1,2)

// Kaplan-Meier plot
sts graph, ytitle("All-cause survival") xtitle("Time from randomisation") 
sts gen S_km = s

// calculate expected survival and hazard functions
// increase observations as this is evaluated at 1000 points.
set obs 1000
gen datediag = mdy(1,1,2009) + runiform(1,365)
gen sex = 1
stexpect3 using life_tables_1971_2009_england, agediag(age) ///
   datediag(datediag) pmother(sex) pmage(age) pmyear(calendar_year) every(0.2) maxt(80) pmmaxyear(2009) pmmaxage(99)
replace exphaz = exphaz*1000

// Standard parametric models
// Weibull lognormal and ggamma as an example
foreach dist in weibull lognormal ggamma {
	streg, dist(`dist')
	estimates store streg_`dist'
	predict S1_`dist', surv
	predict h1_`dist', hazard
	replace h1_`dist' = h1_`dist'*1000
	estat ic
	local AIC_`dist': display %4.3f el(r(S),1,5)
}

// Plot survival function
twoway (line S_km _t, sort connect(stairstep) ) ///
		(line S1_weibull _t, sort) ///
		(line S1_lognormal _t, sort) ///
		(line S1_ggamma _t, sort) ///
		(line expsurv t_exp if t_exp<=3, lpattern(dot) lcolor(black)) ///
		,legend(order(1 "K-M" 2 "Weibull (AIC=`AIC_weibull')" 3 "Log-Normal (AIC=`AIC_lognormal')" 4 "Generalised Gamma (AIC=`AIC_ggamma')" 5 "Expected") pos(7) cols(2) ring(0) size(small)) ///
		ylabel(0(0.2)1) ///
		ytitle("Survival function") ///
		title("Some Standard Parametric Models") ///
		name(surv1, replace)
    
// Plot hazard functions
twoway (line S_km _t, sort connect(stairstep) lcolor(white)) /// Add a white dummy line for K-M 
(line h1_weibull _t, sort) ///
		(line h1_lognormal _t, sort) ///
		(line h1_ggamma _t, sort) ///
		(line exphaz t_exp if t_exp<=3, lpattern(dot) lcolor(black)) ///
		,legend(order(2 "Weibull (AIC=`AIC_weibull')" 3 "Log-Normal (AIC=`AIC_lognormal')" 4 "Generalised Gamma (AIC=`AIC_ggamma')" 5 "Expected") pos(1) cols(2) ring(0) size(small)) ///
		ylabel(100 200 500 1000,angle(h)) ///
    yscale(log) ///
		ytitle("Mortality rate (per 1000 py)") ///
		title("Some Standard Parametric Models") ///
		name(haz1, replace)    
    
// Estimates of RMST at three years    
foreach dist in weibull lognormal ggamma {
    qui integ S1_`dist' _t
    display "RMST at 3 years for `dist' is: " %5.3f `r(integral)' "years"
}
    
    

// Now plot extrapolated curves to 80 years
// Also calculate mean survival
preserve
gen oldt = _t
drop _t
replace _t0 = 0
replace _st=1
range _t 0 80
foreach dist in weibull lognormal ggamma {
	estimates restore streg_`dist'
	predict S2_`dist'_long, surv
	predict h2_`dist'_long, hazard
	replace h2_`dist'_long = h2_`dist'_long*1000
	
}

twoway (line S_km oldt, sort connect(stairstep)) ///
		(line S2_weibull_long _t, sort) ///
		(line S2_lognormal_long _t, sort) ///
		(line S2_ggamma_long _t, sort) ///
		(line expsurv t_exp if t_exp<=80, lpattern(dot) lcolor(black)) ///
		,legend(order(1 "K-M" 2 "Weibull (AIC=`AIC_weibull')" 3 "Log-Normal (AIC=`AIC_lognormal')" 4 "Generalised Gamma (AIC=`AIC_ggamma')" 5 "Expected") pos(2) cols(2) ring(0) size(small)) ///
		title("Standard Models: Extrapolated Survival Functions") ///
		ytitle("Survival function") ///
		name(surv2, replace)

// only plot expected hazard when expected survival is >0.01    
twoway 	(line S_km oldt, sort connect(stairstep) color(white)) /// Add a white dummy line for K-M  
		(line h2_weibull_long _t if h2_weibull_long<=2000 & _t>=3, sort) ///
		(line h2_lognormal_long _t if h2_lognormal_long<=2000 & _t>=3, sort) ///
		(line h2_ggamma_long _t if h2_ggamma_long<=2000 & _t>=3, sort) ///
		(line exphaz t_exp if t_exp<=80 & t_exp>=3 & expsurv>0.01, lpattern(dot) lcolor(black)) ///
		,legend(order(2 "Weibull (AIC=`AIC_weibull')" 3 "Log-Normal (AIC=`AIC_lognormal')" 4 "Generalised Gamma (AIC=`AIC_ggamma')" 5 "Expected") pos(2) cols(2) ring(0) size(small)) ///
		title("Standard Models: Extrapolated hazard function") ///
		ytitle("Mortality rate (per 1000 person years)") ///
		name(haz2, replace)			

			

// mean survival (evaluted at 80 years)
foreach dist in weibull lognormal ggamma {
  qui integ S2_`dist' _t
  display "Mean Survival for `dist' is: " %5.3f `r(integral)' "years"
}

restore